{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from __future__ import print_function\n",
    "\n",
    "import os\n",
    "import time\n",
    "import numpy as np\n",
    "import tensorflow as tf\n",
    "import pandas as pd\n",
    "from collections import defaultdict\n",
    "\n",
    "from sklearn.metrics import roc_auc_score, accuracy_score\n",
    "import nltk\n",
    "\n",
    "from correct_text import train, decode, decode_sentence, evaluate_accuracy, create_model,\\\n",
    "    get_corrective_tokens, DefaultPTBConfig, DefaultMovieDialogConfig\n",
    "from text_correcter_data_readers import PTBDataReader, MovieDialogReader\n",
    "\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "root_data_path = \"/Users/atpaino/data/textcorrecter/dialog_corpus\"\n",
    "train_path = os.path.join(root_data_path, \"movie_lines.txt\")\n",
    "val_path = os.path.join(root_data_path, \"cleaned_dialog_val.txt\")\n",
    "test_path = os.path.join(root_data_path, \"cleaned_dialog_test.txt\")\n",
    "model_path = os.path.join(root_data_path, \"dialog_correcter_model_testnltk\")\n",
    "config = DefaultMovieDialogConfig()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "data_reader = MovieDialogReader(config, train_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reading data; train = /Users/atpaino/data/textcorrecter/dialog_corpus/movie_lines.txt, test = /Users/atpaino/data/textcorrecter/dialog_corpus/cleaned_dialog_val.txt\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-4-c4f4a34e9f4a>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtrain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_reader\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtrain_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mval_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmodel_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m/Users/atpaino/github/deep-text-correcter/correct_text.pyc\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(data_reader, train_path, test_path, model_path)\u001b[0m\n\u001b[1;32m    138\u001b[0m         \"Reading data; train = {}, test = {}\".format(train_path, test_path))\n\u001b[1;32m    139\u001b[0m     \u001b[0mconfig\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata_reader\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 140\u001b[0;31m     \u001b[0mtrain_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata_reader\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuild_dataset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    141\u001b[0m     \u001b[0mtest_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata_reader\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuild_dataset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtest_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    142\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/Users/atpaino/github/deep-text-correcter/data_reader.pyc\u001b[0m in \u001b[0;36mbuild_dataset\u001b[0;34m(self, path)\u001b[0m\n\u001b[1;32m    125\u001b[0m         \u001b[0;31m# dropouts.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    126\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0m_\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdataset_copies\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 127\u001b[0;31m             \u001b[0;32mfor\u001b[0m \u001b[0msource\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_samples\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    128\u001b[0m                 for bucket_id, (source_size, target_size) in enumerate(\n\u001b[1;32m    129\u001b[0m                         self.config.buckets):\n",
      "\u001b[0;32m/Users/atpaino/github/deep-text-correcter/data_reader.pyc\u001b[0m in \u001b[0;36mread_samples\u001b[0;34m(self, path)\u001b[0m\n\u001b[1;32m    113\u001b[0m         \"\"\"\n\u001b[1;32m    114\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0msource_words\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget_words\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_samples_by_string\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 115\u001b[0;31m             \u001b[0msource\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconvert_token_to_id\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mword\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mword\u001b[0m \u001b[0;32min\u001b[0m \u001b[0msource_words\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    116\u001b[0m             \u001b[0mtarget\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconvert_token_to_id\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mword\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mword\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtarget_words\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    117\u001b[0m             \u001b[0mtarget\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mEOS_ID\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/Users/atpaino/github/deep-text-correcter/data_reader.pyc\u001b[0m in \u001b[0;36mconvert_token_to_id\u001b[0;34m(self, token)\u001b[0m\n\u001b[1;32m     77\u001b[0m         \u001b[0;34m:\u001b[0m\u001b[0;32mreturn\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     78\u001b[0m         \"\"\"\n\u001b[0;32m---> 79\u001b[0;31m         \u001b[0mtoken_with_id\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtoken\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtoken\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtoken_to_id\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     80\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munknown_token\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     81\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtoken_to_id\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtoken_with_id\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "train(data_reader, train_path, val_path, model_path)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Decode sentences"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "data_reader = MovieDialogReader(config, train_path, dropout_prob=0.25, replacement_prob=0.25, dataset_copies=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "corrective_tokens = get_corrective_tokens(data_reader, train_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pickle\n",
    "with open(os.path.join(root_data_path, \"corrective_tokens.pickle\"), \"w\") as f:\n",
    "    pickle.dump(corrective_tokens, f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import pickle\n",
    "with open(os.path.join(root_data_path, \"token_to_id.pickle\"), \"w\") as f:\n",
    "    pickle.dump(data_reader.token_to_id, f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reading model parameters from /Users/atpaino/data/textcorrecter/dialog_corpus/dialog_correcter_model/translate.ckpt-41900\n"
     ]
    }
   ],
   "source": [
    "sess = tf.InteractiveSession()\n",
    "model = create_model(sess, True, model_path, config=config)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false,
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Input: you must have girlfriend\n",
      "Output: you must have a girlfriend\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# Test a sample from the test dataset.\n",
    "decoded = decode_sentence(sess, model, data_reader, \"you must have girlfriend\", corrective_tokens=corrective_tokens)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'decoded' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-7-e6ad1ea29283>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdecoded\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m: name 'decoded' is not defined"
     ]
    }
   ],
   "source": [
    "decoded"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Input: did n't you say that they 're going to develop this revolutionary new thing ...\n",
      "Output: did n't you say that they 're going to develop this revolutionary new thing ...\n",
      "\n"
     ]
    }
   ],
   "source": [
    "decoded = decode_sentence(sess, model, data_reader,\n",
    "                          \"did n't you say that they 're going to develop this revolutionary new thing ...\",\n",
    "                          corrective_tokens=corrective_tokens)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['kvothe', 'went', 'to', 'the', 'market']"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "decode_sentence(sess, model, data_reader, \"kvothe went to market\", corrective_tokens=corrective_tokens, verbose=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['blablahblah', 'and', 'bladdddd', 'went', 'to', 'the', 'market']"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "decode_sentence(sess, model, data_reader, \"blablahblah and bladdddd went to market\", corrective_tokens=corrective_tokens,\n",
    "                verbose=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['do', 'you', 'have', 'a', 'book']"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "decode_sentence(sess, model, data_reader, \"do you have book\", corrective_tokens=corrective_tokens, verbose=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['the', 'cardinals', 'did', 'better', 'than', 'the', 'cubs']"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "decode_sentence(sess, model, data_reader, \"the cardinals did better then the cubs\", corrective_tokens=corrective_tokens, verbose=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Bucket 0: (10, 10)\n",
      "\tBaseline BLEU = 0.8354\n",
      "\tModel BLEU = 0.8492\n",
      "\tBaseline Accuracy: 0.9090\n",
      "\tModel Accuracy: 0.9354\n",
      "Bucket 1: (15, 15)\n",
      "\tBaseline BLEU = 0.8826\n",
      "\tModel BLEU = 0.8595\n",
      "\tBaseline Accuracy: 0.8055\n",
      "\tModel Accuracy: 0.8149\n",
      "Bucket 2: (20, 20)\n",
      "\tBaseline BLEU = 0.8880\n",
      "\tModel BLEU = 0.8216\n",
      "\tBaseline Accuracy: 0.7301\n",
      "\tModel Accuracy: 0.6689\n",
      "Bucket 3: (40, 40)\n",
      "\tBaseline BLEU = 0.9097\n",
      "\tModel BLEU = 0.6357\n",
      "\tBaseline Accuracy: 0.5981\n",
      "\tModel Accuracy: 0.2283\n"
     ]
    }
   ],
   "source": [
    "# 4 layers, 40k steps\n",
    "errors = evaluate_accuracy(sess, model, data_reader, corrective_tokens, test_path)#, max_samples=1000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Bucket 0: (10, 10)\n",
      "\tBaseline BLEU = 0.8368\n",
      "\tModel BLEU = 0.8425\n",
      "\tBaseline Accuracy: 0.9110\n",
      "\tModel Accuracy: 0.9303\n",
      "Bucket 1: (15, 15)\n",
      "\tBaseline BLEU = 0.8818\n",
      "\tModel BLEU = 0.8459\n",
      "\tBaseline Accuracy: 0.8063\n",
      "\tModel Accuracy: 0.8014\n",
      "Bucket 2: (20, 20)\n",
      "\tBaseline BLEU = 0.8891\n",
      "\tModel BLEU = 0.7986\n",
      "\tBaseline Accuracy: 0.7309\n",
      "\tModel Accuracy: 0.6281\n",
      "Bucket 3: (40, 40)\n",
      "\tBaseline BLEU = 0.9099\n",
      "\tModel BLEU = 0.5997\n",
      "\tBaseline Accuracy: 0.6007\n",
      "\tModel Accuracy: 0.1607\n"
     ]
    }
   ],
   "source": [
    "# 4 layers, 30k steps\n",
    "errors = evaluate_accuracy(sess, model, data_reader, corrective_tokens, test_path)#, max_samples=1000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Bucket 0: (10, 10)\n",
      "\tBaseline BLEU = 0.8330\n",
      "\tModel BLEU = 0.8335\n",
      "\tBaseline Accuracy: 0.9067\n",
      "\tModel Accuracy: 0.9218\n",
      "Bucket 1: (15, 15)\n",
      "\tBaseline BLEU = 0.8772\n",
      "\tModel BLEU = 0.8100\n",
      "\tBaseline Accuracy: 0.7980\n",
      "\tModel Accuracy: 0.7437\n",
      "Bucket 2: (20, 20)\n",
      "\tBaseline BLEU = 0.8898\n",
      "\tModel BLEU = 0.7636\n",
      "\tBaseline Accuracy: 0.7366\n",
      "\tModel Accuracy: 0.5370\n",
      "Bucket 3: (40, 40)\n",
      "\tBaseline BLEU = 0.9098\n",
      "\tModel BLEU = 0.5387\n",
      "\tBaseline Accuracy: 0.6041\n",
      "\tModel Accuracy: 0.1117\n"
     ]
    }
   ],
   "source": [
    "# 4 layers, 20k steps\n",
    "errors = evaluate_accuracy(sess, model, data_reader, corrective_tokens, test_path)#, max_samples=1000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Bucket 0: (10, 10)\n",
      "\tBaseline BLEU = 0.8341\n",
      "\tModel BLEU = 0.8516\n",
      "\tBaseline Accuracy: 0.9083\n",
      "\tModel Accuracy: 0.9384\n",
      "Bucket 1: (15, 15)\n",
      "\tBaseline BLEU = 0.8850\n",
      "\tModel BLEU = 0.8860\n",
      "\tBaseline Accuracy: 0.8156\n",
      "\tModel Accuracy: 0.8491\n",
      "Bucket 2: (20, 20)\n",
      "\tBaseline BLEU = 0.8876\n",
      "\tModel BLEU = 0.8880\n",
      "\tBaseline Accuracy: 0.7291\n",
      "\tModel Accuracy: 0.7817\n",
      "Bucket 3: (40, 40)\n",
      "\tBaseline BLEU = 0.9099\n",
      "\tModel BLEU = 0.9045\n",
      "\tBaseline Accuracy: 0.6073\n",
      "\tModel Accuracy: 0.6425\n"
     ]
    }
   ],
   "source": [
    "errors = evaluate_accuracy(sess, model, data_reader, corrective_tokens, test_path)#, max_samples=1000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false,
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Decoding: you beg for mercy in a second .\n",
      "Target:   you 'll beg for mercy in a second .\n",
      "\n",
      "Decoding: i 'm dying for a shower . you could use the one too . and we 'd better check that bandage .\n",
      "Target:   i 'm dying for a shower . you could use one too . and we 'd better check that bandage .\n",
      "\n",
      "Decoding: whatever ... they 've become hotshot computer guys so they get a job to build el computer grande ... skynet ... for the government . right ?\n",
      "Target:   whatever ... they become the hotshot computer guys so they get the job to build el computer grande ... skynet ... for the government . right ?\n",
      "\n",
      "Decoding: did n't you say that they 're going to develop this revolutionary a new thing ...\n",
      "Target:   did n't you say that they 're going to develop this revolutionary new thing ...\n",
      "\n",
      "Decoding: bag some z ?\n",
      "Target:   bag some z 's ?\n",
      "\n",
      "Decoding: sleep . it 'll be a light soon .\n",
      "Target:   sleep . it 'll be light soon .\n",
      "\n",
      "Decoding: well , at least i know what to name him . i do n't suppose you 'd know who father is ? so i do n't tell him to get lost when i meet him .\n",
      "Target:   well , at least i know what to name him . i do n't suppose you 'd know who the father is ? so i do n't tell him to get lost when i meet him .\n",
      "\n",
      "Decoding: we got ta get you to doctor .\n",
      "Target:   we got ta get you to a doctor .\n",
      "\n",
      "Decoding: hunter killers . patrol machines . a build in automated factories . most of us were rounded up , put in camps ... for orderly disposal .\n",
      "Target:   hunter killers . patrol machines . build in automated factories . most of us were rounded up , put in camps ... for orderly disposal .\n",
      "\n",
      "Decoding: but outside , it 's a living human tissue . flesh , skin , hair ... blood . grown for the cyborgs .\n",
      "Target:   but outside , it 's living human tissue . flesh , skin , hair ... blood . grown for the cyborgs .\n",
      "\n",
      "Decoding: you heard enough . decide . are you going to release me ?\n",
      "Target:   you 've heard enough . decide . are you going to release me ?\n",
      "\n",
      "Decoding: okay . okay . but this ... cyborg ... if it metal ...\n",
      "Target:   okay . okay . but this ... cyborg ... if it 's metal ...\n",
      "\n",
      "Decoding: you go naked . something about the field generated by living organism . nothing dead will go .\n",
      "Target:   you go naked . something about the field generated by a living organism . nothing dead will go .\n",
      "\n",
      "Decoding: ca n't . nobody goes home . nobody else comes through . it just him and me .\n",
      "Target:   ca n't . nobody goes home . nobody else comes through . it 's just him and me .\n",
      "\n",
      "Decoding: i see . and this ... computer , thinks it can win by killing the mother of its enemy , kill- ing him , in effect , before he is even conceived ? sort of retroactive abortion ?\n",
      "Target:   i see . and this ... computer , thinks it can win by killing the mother of its enemy , kill- ing him , in effect , before he is even conceived ? a sort of retroactive abortion ?\n",
      "\n",
      "Decoding: skynet . a computer defense system built for sac-norad by cyber dynamics . modified series 4800 .\n",
      "Target:   skynet . a computer defense system built for sac-norad by cyber dynamics . a modified series 4800 .\n",
      "\n",
      "Decoding: a year 2027 ?\n",
      "Target:   the year 2027 ?\n",
      "\n",
      "Decoding: with one thirty a second under perry , from '21 to '27 --\n",
      "Target:   with the one thirty second under perry , from '21 to '27 --\n",
      "\n",
      "Decoding: why do n't you just stretch out here and get some sleep . it take your mom 's a good hour to get here from redlands .\n",
      "Target:   why do n't you just stretch out here and get some sleep . it 'll take your mom a good hour to get here from redlands .\n",
      "\n",
      "Decoding: lieutenant , are you sure it them ? maybe i should see the ... bodies .\n",
      "Target:   lieutenant , are you sure it 's them ? maybe i should see the ... bodies .\n",
      "\n",
      "Decoding: i already did . no answer at the door and the apartment manager 's out . i keeping them there .\n",
      "Target:   i already did . no answer at the door and the apartment manager 's out . i 'm keeping them there .\n",
      "\n",
      "Decoding: that stuff two hours cold .\n",
      "Target:   that stuff 's two hours cold .\n",
      "\n",
      "Decoding: you got ta be kidding me . the new guys 'll be short-stroking it over this one . one-day pattern killer .\n",
      "Target:   you got ta be kidding me . the new guys 'll be short-stroking it over this one . a one-day pattern killer .\n",
      "\n",
      "Decoding: give me a short version .\n",
      "Target:   give me the short version .\n",
      "\n",
      "Decoding: because it 's fair . give me the next quarter . if you still feel this way , vote your shares ...\n",
      "Target:   because it 's fair . give me next quarter . if you still feel this way , vote your shares ...\n",
      "\n",
      "Decoding: it 's probably will . in fact , i 'd go so far as to say it 's almost certainly will , in time . why should i settle for that ?\n",
      "Target:   it probably will . in fact , i 'd go so far as to say it almost certainly will , in time . why should i settle for that ?\n",
      "\n",
      "Decoding: stock will turn .\n",
      "Target:   the stock will turn .\n",
      "\n",
      "Decoding: you want to know what it is ? what 's it all about ? john . chapter nine . verse twenty-five .\n",
      "Target:   you want to know what it is ? what it 's all about ? john . chapter nine . verse twenty-five .\n",
      "\n",
      "Decoding: i only mention it because i took a test this afternoon , down on montgomery street .\n",
      "Target:   i only mention it because i took the test this afternoon , down on montgomery street .\n",
      "\n",
      "Decoding: christine ! mister van orton is valued customer ...\n",
      "Target:   christine ! mister van orton is a valued customer ...\n",
      "\n",
      "Decoding: a single ?\n",
      "Target:   single ?\n",
      "\n",
      "Decoding: there 's another gig starting in saudi arabia . i just a walk-on this time though . bit-part .\n",
      "Target:   there 's another gig starting in saudi arabia . i 'm just a walk-on this time though . bit-part .\n",
      "\n",
      "Decoding: no ! you take another step , i shoot ! they 're trying to kill me ...\n",
      "Target:   no ! you take another step , i 'll shoot ! they 're trying to kill me ...\n",
      "\n",
      "Decoding: listen very carefully , i 'm telling the truth ... this is a game . this was all the game .\n",
      "Target:   listen very carefully , i 'm telling the truth ... this is the game . this was all the game .\n",
      "\n",
      "Decoding: that 's gun . that 's ... that 's not automatic . the guard had an automatic ...\n",
      "Target:   that gun . that ... that 's not automatic . the guard had an automatic ...\n",
      "\n",
      "Decoding: take a picture out .\n",
      "Target:   take the picture out .\n",
      "\n",
      "Decoding: yeah . first communion . are n't i little angel ?\n",
      "Target:   yeah . first communion . are n't i a little angel ?\n",
      "\n",
      "Decoding: let me go get some clothes on . we talk , okay ? be right back .\n",
      "Target:   let me go get some clothes on . we 'll talk , okay ? be right back .\n",
      "\n",
      "Decoding: i 'm tired . i 'm sorry , i should go . i 've been enough of nuisance .\n",
      "Target:   i 'm tired . i 'm sorry , i should go . i 've been enough of a nuisance .\n",
      "\n",
      "Decoding: they said five hundred . i said six . they said man in the gray flannel suit . i think i said , you mean the attractive guy in the gray flannel suit ?\n",
      "Target:   they said five hundred . i said six . they said the man in the gray flannel suit . i think i said , you mean the attractive guy in the gray flannel suit ?\n",
      "\n",
      "Decoding: i have a confession to make . someone gave me six-hundred dollars to spill a drinks on you , as a practical joke .\n",
      "Target:   i have a confession to make . someone gave me six-hundred dollars to spill drinks on you , as a practical joke .\n",
      "\n",
      "Decoding: maitre d ' called you christine .\n",
      "Target:   the maitre d ' called you christine .\n",
      "\n",
      "Decoding: i know owner of campton place . i could talk to him in the morning .\n",
      "Target:   i know the owner of campton place . i could talk to him in the morning .\n",
      "\n",
      "Decoding: fresh shirt ...\n",
      "Target:   a fresh shirt ...\n",
      "\n",
      "Decoding: investment banking . moving money from a place to place .\n",
      "Target:   investment banking . moving money from place to place .\n",
      "\n",
      "Decoding: what 's the c .r .s . ?\n",
      "Target:   what 's c .r .s . ?\n",
      "\n",
      "Decoding: this is a c .r .s .\n",
      "Target:   this is c .r .s .\n",
      "\n",
      "Decoding: their ladder here .\n",
      "Target:   there 's a ladder here .\n",
      "\n",
      "Decoding: this is n't attempt to be gallant . if i do n't lift you , how are you going to get there ?\n",
      "Target:   this is n't an attempt to be gallant . if i do n't lift you , how are you going to get there ?\n",
      "\n",
      "Decoding: are you suggesting we wait till someone 's finds us ?\n",
      "Target:   are you suggesting we wait till someone finds us ?\n",
      "\n",
      "Decoding: `` ... wait for help . '' wait for help . i 'm not opening that specifically warns me not to .\n",
      "Target:   `` ... wait for help . '' wait for help . i 'm not opening a door that specifically warns me not to .\n",
      "\n",
      "Decoding: read what it says : `` warning , do < u > not < /u > attempt to open . if elevator stops , use the emergency ... ``\n",
      "Target:   read what it says : `` warning , do < u > not < /u > attempt to open . if elevator stops , use emergency ... ``\n",
      "\n",
      "Decoding: long story . i found this key in the mouth of wooden harlequin .\n",
      "Target:   long story . i found this key in the mouth of a wooden harlequin .\n",
      "\n",
      "Decoding: how do you know that way ?\n",
      "Target:   how do you know that 's the way ?\n",
      "\n",
      "Decoding: it 's run by company ... they play elaborate pranks . things like this . i 'm really only now finding out myself .\n",
      "Target:   it 's run by a company ... they play elaborate pranks . things like this . i 'm really only now finding out myself .\n",
      "\n",
      "Decoding: you got to be kidding .\n",
      "Target:   you 've got to be kidding .\n",
      "\n",
      "Decoding: i do n't think he breathing .\n",
      "Target:   i do n't think he 's breathing .\n",
      "\n",
      "Decoding: a bad month . you did exact the same thing to me last week .\n",
      "Target:   a bad month . you did the exact same thing to me last week .\n",
      "\n",
      "Decoding: yeah , yeah . she 's called a cab . said something about catching plane .\n",
      "Target:   yeah , yeah . she called a cab . said something about catching a plane .\n",
      "\n",
      "Decoding: oh , god yes please . thanks , man . i take you up on that .\n",
      "Target:   oh , god yes please . thanks , man . i 'll take you up on that .\n",
      "\n",
      "Decoding: this ... ? oh , this is just ... this is bill .\n",
      "Target:   this ... ? oh , this is just ... this is the bill .\n",
      "\n",
      "Decoding: baby , they were all over the house with metal detectors . they switched your gun with look-alike , rigged barrel , loaded with blanks . pop-gun .\n",
      "Target:   baby , they were all over the house with metal detectors . they switched your gun with a look-alike , rigged barrel , loaded with blanks . pop-gun .\n",
      "\n",
      "Decoding: you dodged bullet .\n",
      "Target:   you dodged a bullet .\n",
      "\n",
      "Decoding: c .r .s . who do you think ? jesus h . , thank your lucky charms . to think what i 've almost got you into .\n",
      "Target:   c .r .s . who do you think ? jesus h . , thank your lucky charms . to think what i almost got you into .\n",
      "\n",
      "Decoding: it 's profound life experience .\n",
      "Target:   it 's a profound life experience .\n",
      "\n",
      "Decoding: you 've heard of it . you 've seen other people having it . they 're entertainment service , but more than that .\n",
      "Target:   you 've heard of it . you 've seen other people having it . they 're an entertainment service , but more than that .\n",
      "\n",
      "Decoding: they make your life fun . there 's only guarantee is you will not be bored .\n",
      "Target:   they make your life fun . their only guarantee is you will not be bored .\n",
      "\n",
      "Decoding: not after i done with it . actually , i 've been here . in grad-school i bought crystal-meth from the maitre d ' .\n",
      "Target:   not after i 'm done with it . actually , i 've been here . in grad-school i bought crystal-meth from the maitre d ' .\n",
      "\n",
      "Decoding: that 's why it 's a classic . come on , man ... how 'bout hug ... ?\n",
      "Target:   that 's why it 's a classic . come on , man ... how 'bout a hug ... ?\n",
      "\n",
      "Decoding: how much is it ? a few thousand , at least . a rolex like that ... lucky for you 've missed it .\n",
      "Target:   how much is it ? a few thousand , at least . a rolex like that ... lucky for you they missed it .\n",
      "\n",
      "Decoding: i told you , they hired me over the phone . i 've never met anyone .\n",
      "Target:   i told you , they hired me over the phone . i never met anyone .\n",
      "\n",
      "Decoding: i do n't want money . i 'm pulling back curtain . i 'm here to meet the wizard .\n",
      "Target:   i do n't want money . i 'm pulling back the curtain . i 'm here to meet the wizard .\n",
      "\n",
      "Decoding: tell them the cops are after you ... tell them you got to talk to someone , i 'm threatening to blow the whistle .\n",
      "Target:   tell them the cops are after you ... tell them you 've got to talk to someone , i 'm threatening to blow the whistle .\n",
      "\n",
      "Decoding: they own the whole building . they just move from the floor to floor .\n",
      "Target:   they own the whole building . they just move from floor to floor .\n",
      "\n",
      "Decoding: look , it was just a job . nothing personal , ya know ? i play my part , improvise little . that 's what i 'm good at .\n",
      "Target:   look , it was just a job . nothing personal , ya know ? i play my part , improvise a little . that 's what i 'm good at .\n",
      "\n",
      "Decoding: that 's right -- you 're left-brain the word fetishist .\n",
      "Target:   that 's right -- you 're a left-brain word fetishist .\n",
      "\n",
      "Decoding: one guarantee . payment 's entirely at your brother discretion and , as a gift , dependent on your satisfaction .\n",
      "Target:   one guarantee . payment 's entirely at your brother 's discretion and , as a gift , dependent on your satisfaction .\n",
      "\n",
      "Decoding: your brother was a client with our branch . we do a sort of informal scoring . his numbers were outstanding . sure you 're not hungry at all ... ? tung hoy , best in chinatown ...\n",
      "Target:   your brother was a client with our london branch . we do a sort of informal scoring . his numbers were outstanding . sure you 're not hungry at all ... ? tung hoy , best in chinatown ...\n",
      "\n",
      "Decoding: key ?\n",
      "Target:   the key ?\n",
      "\n",
      "Decoding: nobody 's worried about your father .\n",
      "Target:   nobody worried about your father .\n",
      "\n",
      "Decoding: there 's been a break in . lock this door and stay here . do n't move muscle .\n",
      "Target:   there 's been a break in . lock this door and stay here . do n't move a muscle .\n",
      "\n",
      "Decoding: i do n't know what you 're talking about . what happened ?\n",
      "Target:   i do n't know what you 're talking about . what 's happened ?\n",
      "\n",
      "Decoding: did alarm go off ? the house ... they ... you did n't see ... ?\n",
      "Target:   did the alarm go off ? the house ... they ... you did n't see ... ?\n",
      "\n",
      "Decoding: then then .\n",
      "Target:   goodnight then .\n",
      "\n",
      "Decoding: okay . i think he into some sort of new personal improvement cult .\n",
      "Target:   okay . i think he 's into some sort of new personal improvement cult .\n",
      "\n",
      "Decoding: dinner in the oven .\n",
      "Target:   dinner 's in the oven .\n",
      "\n",
      "Decoding: there was incident a few days ago ... a nervous breakdown , they said . the police took him . they left this address , in case anyone ...\n",
      "Target:   there was an incident a few days ago ... a nervous breakdown , they said . the police took him . they left this address , in case anyone ...\n",
      "\n",
      "Decoding: what 's trouble ?\n",
      "Target:   what 's the trouble ?\n",
      "\n",
      "Decoding: mister ... seymour butts .\n",
      "Target:   a mister ... seymour butts .\n",
      "\n",
      "Decoding: what 's the gentleman , maria ?\n",
      "Target:   what gentleman , maria ?\n",
      "\n",
      "Decoding: i would n't mention following , except he was very insistent . it 's obviously some sort of prank ...\n",
      "Target:   i would n't mention the following , except he was very insistent . it 's obviously some sort of prank ...\n",
      "\n",
      "Decoding: i send your regrets . honestly , why must i even bother ?\n",
      "Target:   i 'll send your regrets . honestly , why must i even bother ?\n",
      "\n",
      "Decoding: the hinchberger 's wedding .\n",
      "Target:   the hinchberger wedding .\n",
      "\n",
      "Decoding: invitations : museum gala .\n",
      "Target:   invitations : the museum gala .\n",
      "\n",
      "Decoding: nice touch . does a game use real bullets ... ?\n",
      "Target:   nice touch . does the game use real bullets ... ?\n",
      "\n",
      "Decoding: it 's what they do . it 's like ... being toyed with by a bunch of ... depraved children\n",
      "Target:   it 's what they do . it 's like ... being toyed with by a bunch of ... depraved children .\n",
      "\n",
      "Decoding: find out about a company called the c .r .s . consumer recreation services .\n",
      "Target:   find out about a company called c .r .s . consumer recreation services .\n",
      "\n",
      "Decoding: someone 's playing hardball . it 's complicated . can i ask favor ?\n",
      "Target:   someone 's playing hardball . it 's complicated . can i ask a favor ?\n",
      "\n",
      "Decoding: how 's the concerned should i be ?\n",
      "Target:   how concerned should i be ?\n",
      "\n",
      "Decoding: that you 've a involved conrad ... is unforgivable . i am now your enemy .\n",
      "Target:   that you 've involved conrad ... is unforgivable . i am now your enemy .\n",
      "\n",
      "Decoding: what happened ...\n",
      "Target:   what 's happened ...\n",
      "\n",
      "Decoding: modelling small-group dynamics in formation of narrative hallucinations . you brought us here to scare us . insomnia , that was just a decoy issue . you 're disgusting .\n",
      "Target:   modelling small-group dynamics in the formation of narrative hallucinations . you brought us here to scare us . insomnia , that was just a decoy issue . you 're disgusting .\n",
      "\n",
      "Decoding: come on . these are the typically sentimental gestures of depraved industrialist .\n",
      "Target:   come on . these are the typically sentimental gestures of a depraved industrialist .\n",
      "\n",
      "Decoding: the children . children hugh crain built the house for . the children he never had .\n",
      "Target:   the children . the children hugh crain built the house for . the children he never had .\n",
      "\n",
      "Decoding: obsessive worrier . join club . and you ? i 'd guess ...\n",
      "Target:   obsessive worrier . join the club . and you ? i 'd guess ...\n",
      "\n",
      "Decoding: so why did you need the addam family mansion for a scientific test ?\n",
      "Target:   so why did you need the addam 's family mansion for a scientific test ?\n",
      "\n",
      "Decoding: -- how much is this car 's worth ?\n",
      "Target:   -- how much is this car worth ?\n",
      "\n",
      "Decoding: you do n't really believe it haunted ... do you believe in ghosts ?\n",
      "Target:   you do n't really believe it 's haunted ... do you believe in ghosts ?\n",
      "\n",
      "Decoding: so could you ! is this some fucked up the idea of art , putting someone else 's name to a painting ?\n",
      "Target:   so could you ! is this some fucked up idea of art , putting someone else 's name to a painting ?\n",
      "\n",
      "Decoding: and why did n't marrow tell < u > us < /u > ? does n't he a trust women ? that fuck .\n",
      "Target:   and why did n't marrow tell < u > us < /u > ? does n't he trust women ? that fuck .\n",
      "\n",
      "Decoding: nah , you 're going crazy with doubt , all of your mistakes are coming back up the pipes , and it 's worse than nightmare . --\n",
      "Target:   nah , you 're going crazy with doubt , all of your mistakes are coming back up the pipes , and it 's worse than a nightmare . --\n",
      "\n",
      "Decoding: not the way you 've constructed your group , it just not ethical !\n",
      "Target:   not the way you 've constructed your group , it 's just not ethical !\n",
      "\n",
      "Decoding: children want me . they 're calling me . they need me .\n",
      "Target:   the children want me . they 're calling me . they need me .\n",
      "\n",
      "Decoding: i looked at theo . she had look on her face .\n",
      "Target:   i looked at theo . she had a look on her face .\n",
      "\n",
      "Decoding: i was n't thinking about my mother bathroom .\n",
      "Target:   i was n't thinking about my mother 's bathroom .\n",
      "\n",
      "Decoding: so ... smell ... is ... smell is sense that triggers the most powerful memories . and memory can trigger a smell .\n",
      "Target:   so ... smell ... is ... smell is the sense that triggers the most powerful memories . and a memory can trigger a smell .\n",
      "\n",
      "Decoding: in the bathroom in my mother 's room , toilet was next to old wooden table . it smelled like that wood .\n",
      "Target:   in the bathroom in my mother 's room , the toilet was next to an old wooden table . it smelled like that wood .\n",
      "\n",
      "Decoding: cold sensation . who felt it first ?\n",
      "Target:   the cold sensation . who felt it first ?\n",
      "\n",
      "Decoding: i really ... honored to be part of this study , jim .\n",
      "Target:   i 'm really ... honored to be part of this study , jim .\n",
      "\n",
      "Decoding: nell . good enough . and i jim .\n",
      "Target:   nell . good enough . and i 'm jim .\n",
      "\n",
      "Decoding: that ? that 's a hill house .\n",
      "Target:   that ? that 's hill house .\n",
      "\n",
      "Decoding: here 's how they 're organized . groups of five , very different personalities : scored all over the kiersey temperament sorter just like you asked for . and they all score high on insomnia charts .\n",
      "Target:   here 's how they 're organized . groups of five , very different personalities : scored all over the kiersey temperament sorter just like you asked for . and they all score high on the insomnia charts .\n",
      "\n",
      "Decoding: you hear the vibrations in the wire . there 's magnetic pulse in the wires , you feel it . i could test it .\n",
      "Target:   you hear the vibrations in the wire . there 's a magnetic pulse in the wires , you feel it . i could test it .\n",
      "\n",
      "Decoding: but experiment was a failure .\n",
      "Target:   but the experiment was a failure .\n",
      "\n",
      "Decoding: he wandering around house , and nell heard him . she thought it was ghosts . let 's go look for him again .\n",
      "Target:   he 's wandering around the house , and nell heard him . she thought it was ghosts . let 's go look for him again .\n",
      "\n",
      "Decoding: i 'll take her with me to university tomorrow . i ca n't believe i read the test wrong . i did n't see anything that looked like she was suicidal .\n",
      "Target:   i 'll take her with me to the university tomorrow . i ca n't believe i read the test wrong . i did n't see anything that looked like she was suicidal .\n",
      "\n",
      "Decoding: no , but nell been here longer than i have .\n",
      "Target:   no , but nell 's been here longer than i have .\n",
      "\n",
      "Decoding: rene crain . up there . rope . ship 's hawser . hard to tie . do n't know how she 's got it .\n",
      "Target:   rene crain . up there . rope . ship 's hawser . hard to tie . do n't know how she got it .\n",
      "\n",
      "Decoding: mrs . dudley be waiting for you .\n",
      "Target:   mrs . dudley 'll be waiting for you .\n",
      "\n",
      "Decoding: that 's a good question . what is it about fences ? sometimes a locked chain makes people on both sides of fence just a little more comfortable . why would that be ?\n",
      "Target:   that 's a good question . what is it about fences ? sometimes a locked chain makes people on both sides of the fence just a little more comfortable . why would that be ?\n",
      "\n",
      "Decoding: well , i 've never lived with a beauty . you must love working here .\n",
      "Target:   well , i 've never lived with beauty . you must love working here .\n",
      "\n",
      "Decoding: nell , it makes sense . it 's all makes sense . you and i , we were scaring each other , working each other up .\n",
      "Target:   nell , it makes sense . it all makes sense . you and i , we were scaring each other , working each other up .\n",
      "\n"
     ]
    }
   ],
   "source": [
    "for decoding, target in errors:\n",
    "    print(\"Decoding: \" + \" \".join(decoding))\n",
    "    print(\"Target:   \" + \" \".join(target) + \"\\n\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
